In this notebook, we are using the tmb_genomic.tsv file
generated from the 01-preprocess-data.Rmd script.
suppressPackageStartupMessages({
library(tidyverse)
})
# Detect the ".git" folder. This will be in the project root directory.
# Use this as the root directory to ensure proper sourcing of functions
# no matter where this is called from.
root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
scratch_dir <- file.path(root_dir, "scratch")
analysis_dir <- file.path(root_dir, "analyses", "tmb-vaf-longitudinal")
input_dir <- file.path(analysis_dir, "input")
# Input files
tmb_genomic_file <- file.path(scratch_dir, "tmb_genomic.tsv")
tumor_descriptor_color_palette_file <- file.path(root_dir, "figures", "palettes", "tumor_descriptor_color_palette.tsv")
# File path to plots directory
plots_dir <-
file.path(analysis_dir, "plots")
if (!dir.exists(plots_dir)) {
dir.create(plots_dir)
}
source(paste0(analysis_dir, "/util/function-create-barplot.R"))
source(paste0(root_dir, "/figures/scripts/theme.R"))
# Read and process tmb_genomic file
tmb_genomic_all <- readr::read_tsv(tmb_genomic_file, guess_max = 100000, show_col_types = FALSE)
# Are there any samples with both WGS and WXS?
tmb_genomic_all %>%
unique() %>%
arrange(Kids_First_Participant_ID, experimental_strategy) %>%
group_by(Kids_First_Participant_ID) %>%
dplyr::summarise(experimental_strategy_sum = str_c(experimental_strategy, collapse = ";"))
# Create a factor to order timepoints
f <- factor(c(tmb_genomic_all$tumor_descriptor), levels = c("Diagnosis", "Progressive", "Recurrence", "Deceased", "Unavailable", "Second Malignancy"))
# Yes, they are, so let's remove these from downstream analyses.
tmb_genomic <- tmb_genomic_all %>%
filter(!experimental_strategy == "WXS") %>%
mutate(patient_id = paste(short_histology, Kids_First_Participant_ID, sep = "_"))
# Read color palette
tumor_descriptor_color_palette <- readr::read_tsv(tumor_descriptor_color_palette_file, guess_max = 100000, show_col_types = FALSE)
We will explore TMB per Kids_First_Participant_ID over
time by creating stacked barplots.
# Define parameters for function
ylim <- 360
tmb_df <- tmb_genomic
# Run function
fname <- paste0(plots_dir, "/", "TMB-genomic.pdf")
print(fname)
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/TMB-genomic.pdf"
p <- create_stacked_barplot(tmb_df = tmb_df, ylim = ylim)
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
Please use `linewidth` instead.
pdf(file = fname, width = 15, height = 6)
print(p)
dev.off()
quartz_off_screen
2
Attention: Hypermutant TMB defined as ≥10 Mb, and Ultrahypermutant TMB defined as ≥100 mutations/Mb (https://pubmed.ncbi.nlm.nih.gov/29056344/).
Here, we notice that there are samples with high TMB (hyper-mutant samples). Next, we will exclude these samples (threshold >= 10) from downstream analysis. Attention is needed in cases with high number of mutations in only one timepoint as this will lead to un-matched longitudinal samples. We will also remove those so we always have matched longitudinal samples.
# Filter df
tmb_genomic_filter <- tmb_genomic %>%
filter(!tmb >= 10) %>%
unique() %>%
arrange(Kids_First_Participant_ID, tumor_descriptor) %>%
group_by(Kids_First_Participant_ID) %>%
dplyr::summarise(tumor_descriptor_sum = str_c(tumor_descriptor, collapse = ";")) %>%
filter(!tumor_descriptor_sum %in% c("Diagnosis", "Progressive", "Recurrence")) %>%
dplyr::left_join(tmb_genomic, by = c("Kids_First_Participant_ID", "tumor_descriptor_sum")) %>%
mutate(cancer_group_sum = ifelse(short_histology == "HGAT", "High-grade glioma",
ifelse(short_histology == "LGAT", "Low-grade glioma", "Other cancer group")),
cancer_group_sum = replace_na(cancer_group_sum, "Other")) %>%
drop_na(tmb)
# Define parameters for function
ylim <- 12.5
tmb_df <- tmb_genomic_filter
# Run function
fname <- paste0(plots_dir, "/", "TMB-genomic-no-hypermutants.pdf")
print(fname)
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/TMB-genomic-no-hypermutants.pdf"
p <- create_stacked_barplot(tmb_df = tmb_df, ylim = ylim)
pdf(file = fname, width = 25, height = 8)
print(p)
dev.off()
quartz_off_screen
2
We will explore TMB per cancer group over time by creating dumbbell plots. We classified by using cancer types with the highest number of samples (High- and Low-grade gliomas) versus any other cancer groups.
cancer_groups <- unique(as.character(tmb_genomic_filter$cancer_group_sum))
cancer_groups <- sort(cancer_groups, decreasing = FALSE)
print(cancer_groups)
[1] "High-grade glioma" "Low-grade glioma" "Other cancer group"
for (i in seq_along(cancer_groups)) {
print(i)
df_ct_sub <- tmb_genomic_filter %>%
filter(cancer_group_sum == cancer_groups [i])
if (i == 1) {
print(cancer_groups [i])
# Define parameters for function
ylim <- 8
} else if (i == 2) {
print(cancer_groups [i])
# Define parameters for function
ylim <- 4
} else {
print(cancer_groups [i])
# Define parameters for function
ylim <- 4
}
# Name plots
fname <- paste0(plots_dir, "/", "TMB-genomic-dumbbell", "-", cancer_groups[i], ".pdf")
print(fname)
# Run function
p <- create_dumbbell_ct(tmb_df = df_ct_sub,
ylim = ylim,
ct_id = cancer_groups[i])
pdf(file = fname, width = 18, height = 10)
print(p)
dev.off()
}
[1] 1
[1] "High-grade glioma"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/TMB-genomic-dumbbell-High-grade glioma.pdf"
[1] 2
[1] "Low-grade glioma"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/TMB-genomic-dumbbell-Low-grade glioma.pdf"
[1] 3
[1] "Other cancer group"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/TMB-genomic-dumbbell-Other cancer group.pdf"
for (i in seq_along(cancer_groups)) {
print(i)
df_ct_sub <- tmb_genomic_filter %>%
filter(cancer_group_sum == cancer_groups [i])
if (i == 1) {
print(cancer_groups [i])
# Define parameters for function
ylim <- 260
} else if (i == 2) {
print(cancer_groups [i])
# Define parameters for function
ylim <- 150
} else {
print(cancer_groups [i])
# Define parameters for function
ylim <- 150
}
# Name plots
fname <- paste0(plots_dir, "/", "Mutations-genomic-dumbbell", "-", cancer_groups[i], ".pdf")
print(fname)
# Run function
p <- create_dumbbell_ct_mut(tmb_df = df_ct_sub,
ylim = ylim,
ct_id = cancer_groups[i])
pdf(file = fname, width = 18, height = 10)
print(p)
dev.off()
}
[1] 1
[1] "High-grade glioma"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/Mutations-genomic-dumbbell-High-grade glioma.pdf"
[1] 2
[1] "Low-grade glioma"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/Mutations-genomic-dumbbell-Low-grade glioma.pdf"
[1] 3
[1] "Other cancer group"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/Mutations-genomic-dumbbell-Other cancer group.pdf"
Here, we want to explore the number of mutations per timepoint and biospecimen sample per patient case.
samples <- unique(as.character(tmb_genomic_filter$Kids_First_Participant_ID))
print(samples)
[1] "PT_00G007DM" "PT_02J5CWN5" "PT_1H2REHT2" "PT_1ZAWNGWT" "PT_25Z2NX27" "PT_2ECVKTTQ" "PT_2FVTD0WR" "PT_2YT37G8P"
[9] "PT_37B5JRP1" "PT_3R0P995B" "PT_3T3VGWC6" "PT_3VCS1PPF" "PT_7M2PGCBV" "PT_82MX6J77" "PT_89XRZBSG" "PT_8GN3TQRM"
[17] "PT_962TCBVR" "PT_98QMQZY7" "PT_99S5BPE3" "PT_9PJR0ZK7" "PT_9S6WMQ92" "PT_AQWDQW27" "PT_CXT81GRM" "PT_DFQAH7RS"
[25] "PT_ESHACWF6" "PT_FN4GEEFR" "PT_HFQNKP5X" "PT_HJMP6PH2" "PT_JNEV57VK" "PT_JP1FDKN9" "PT_JSFBMK5V" "PT_K8ZV7APT"
[33] "PT_KMHGNCNR" "PT_MDWPRDBT" "PT_MNSEJCDM" "PT_N8W26H19" "PT_NPETR8RY" "PT_PFA762TK" "PT_PR4YBBH3" "PT_QH9H491G"
[41] "PT_RJ1TJ2KH" "PT_S2SQJVGK" "PT_S4YNE17X" "PT_T2M1338J" "PT_TKWTTRQ7" "PT_W6AWJJK7" "PT_WP871F5S" "PT_XA98HG1C"
[49] "PT_XHYBZKCX" "PT_XTVQB9S4" "PT_YGN06RPZ" "PT_Z4GS3ZQQ" "PT_ZMKMKCFQ" "PT_ZZRBX5JT"
for (i in seq_along(samples)) {
print(i)
tmb_sub <- tmb_genomic_filter %>%
filter(Kids_First_Participant_ID == samples[i])
if (i %in% c(42, 37, 16, 1, 38, 52)) { # "PT_9S6WMQ92", "PT_ESHACWF6", "PT_JNEV57VK", "PT_TKWTTRQ7", "PT_N8W26H19", "PT_2YT37G8P"
print(samples[i])
# Define parameters for function
ylim <- 260
} else if (i %in% c(27, 31, 30, 6, 38, 28, 15, 49, 47, 46, 50, 54)) {
print(samples[i])
# Define parameters for function
ylim <- 100 # "PT_37B5JRP1", "PT_CXT81GRM", "PT_JSFBMK5V", "PT_XA98HG1C", "PT_N8W26H19", "PT_89XRZBSG","PT_99S5BPE3", "PT_8GN3TQRM", "PT_HFQNKP5X", "PT_MDWPRDBT", "PT_9PJR0ZK7", "PT_1H2REHT2"
} else {
print(samples[i])
# Define parameters for function
ylim <- 50
}
# Run function
fname <- paste0(plots_dir, "/", samples[i], "-TMB-barplot.pdf")
print(fname)
p <- create_barplot_sample(tmb_df = tmb_sub,
ylim = ylim,
sid = samples[i])
pdf(file = fname, width = 5, height = 4)
print(p)
dev.off()
}
[1] 1
[1] "PT_00G007DM"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/PT_00G007DM-TMB-barplot.pdf"
[1] 2
[1] "PT_02J5CWN5"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/PT_02J5CWN5-TMB-barplot.pdf"
[1] 3
[1] "PT_1H2REHT2"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/PT_1H2REHT2-TMB-barplot.pdf"
[1] 4
[1] "PT_1ZAWNGWT"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/PT_1ZAWNGWT-TMB-barplot.pdf"
[1] 5
[1] "PT_25Z2NX27"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/PT_25Z2NX27-TMB-barplot.pdf"
[1] 6
[1] "PT_2ECVKTTQ"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/PT_2ECVKTTQ-TMB-barplot.pdf"
[1] 7
[1] "PT_2FVTD0WR"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/PT_2FVTD0WR-TMB-barplot.pdf"
[1] 8
[1] "PT_2YT37G8P"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/PT_2YT37G8P-TMB-barplot.pdf"
[1] 9
[1] "PT_37B5JRP1"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/PT_37B5JRP1-TMB-barplot.pdf"
[1] 10
[1] "PT_3R0P995B"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/PT_3R0P995B-TMB-barplot.pdf"
[1] 11
[1] "PT_3T3VGWC6"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/PT_3T3VGWC6-TMB-barplot.pdf"
[1] 12
[1] "PT_3VCS1PPF"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/PT_3VCS1PPF-TMB-barplot.pdf"
[1] 13
[1] "PT_7M2PGCBV"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/PT_7M2PGCBV-TMB-barplot.pdf"
[1] 14
[1] "PT_82MX6J77"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/PT_82MX6J77-TMB-barplot.pdf"
[1] 15
[1] "PT_89XRZBSG"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/PT_89XRZBSG-TMB-barplot.pdf"
[1] 16
[1] "PT_8GN3TQRM"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/PT_8GN3TQRM-TMB-barplot.pdf"
[1] 17
[1] "PT_962TCBVR"
[1] "/Users/chronia/CHOP/GitHub/pbta-tumor-evolution/analyses/tmb-vaf-longitudinal/plots/PT_962TCBVR-TMB-barplot.pdf"